#install.packages("plyr")
#install.packages("dplyr")
#install.packages("tidyr")
#install.packages("tidyverse")
#install.packages("psych")
#install.packages("ggpubr")
#install.packages("ggplot2")
#install.packages("plotly")
#install.packages("moments")
#install.packages('gmodels')
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange() masks plyr::arrange()
## ✖ purrr::compact() masks plyr::compact()
## ✖ dplyr::count() masks plyr::count()
## ✖ dplyr::failwith() masks plyr::failwith()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::id() masks plyr::id()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::mutate() masks plyr::mutate()
## ✖ dplyr::rename() masks plyr::rename()
## ✖ dplyr::summarise() masks plyr::summarise()
## ✖ dplyr::summarize() masks plyr::summarize()
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(ggpubr)
##
## Attaching package: 'ggpubr'
##
## The following object is masked from 'package:plyr':
##
## mutate
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(moments)
library(gmodels)
bank_unclean = read.csv("~/Downloads/Archive (1)/bank.csv")
bank_unclean
# Sorted dataframe with descending age
bank_sort <- bank_unclean[order(-bank_unclean$age),]
head(bank_sort)
# Drop column "default"
bank_drop <- select(bank_sort, -5)
head(bank_drop)
# Rename column
names(bank_drop)[names(bank_drop) == 'contact'] <- 'Contact_Info'
head(bank_drop)
# Remove the top 55 % and bottom 45%
bank <- bank_drop[6139:5022,] # Remove the top 55 % and bottom 45%
# Replace column string to Captial letters using GSUB
bank$marital <- gsub("single","SINGLE",as.character(bank$marital))
bank$marital <- gsub("married","MARRIED",as.character(bank$marital))
bank$marital <- gsub("divorced","DIVORCED",as.character(bank$marital))
head(bank)
str(bank)
## 'data.frame': 1118 obs. of 16 variables:
## $ age : int 37 37 37 37 37 37 37 37 37 37 ...
## $ job : chr "management" "technician" "management" "management" ...
## $ marital : chr "SINGLE" "SINGLE" "MARRIED" "MARRIED" ...
## $ education : chr "tertiary" "tertiary" "tertiary" "tertiary" ...
## $ balance : int 102 0 156 0 480 443 0 4017 1113 4151 ...
## $ housing : chr "yes" "yes" "no" "no" ...
## $ loan : chr "no" "no" "no" "no" ...
## $ Contact_Info: chr "cellular" "cellular" "cellular" "cellular" ...
## $ day : int 6 23 19 15 22 29 8 30 2 30 ...
## $ month : chr "may" "jul" "nov" "jan" ...
## $ duration : int 445 366 366 426 344 1600 257 665 229 543 ...
## $ campaign : int 1 6 3 2 2 1 2 2 1 4 ...
## $ pdays : int 258 -1 -1 196 182 -1 97 196 182 -1 ...
## $ previous : int 2 0 0 1 8 0 1 1 1 0 ...
## $ poutcome : chr "failure" "unknown" "unknown" "other" ...
## $ deposit : chr "yes" "yes" "yes" "yes" ...
#get the Mean, Min, Max, STD of each age balance
bank %>%
group_by(age)%>%
summarise_at(vars(balance),list(Mean=mean, Min=min, Max=max,STD=sd))
#get the Mean, Min, Max, STD of each campaign duration
bank %>%
group_by(campaign)%>%
summarise_at(vars(duration),list(Mean=mean, Min=min, Max=max,STD=sd))
subset(bank, balance == 1) #Find subset of balance
subset(bank, age == 20:40) #subset of ages between 20-40 years old
## Warning in age == 20:40: longer object length is not a multiple of shorter
## object length
subset(bank, campaign == 10) #subset of campaign between only counted 10
#1st type of- Frequency Table
bank_table <- table(bank$education, bank$job)
bank_table
##
## admin. blue-collar entrepreneur housemaid management retired
## primary 6 78 3 9 5 0
## secondary 116 106 17 15 28 3
## tertiary 16 5 21 5 235 1
## unknown 4 12 1 0 9 0
##
## self-employed services student technician unemployed unknown
## primary 0 11 0 3 3 0
## secondary 18 107 2 138 24 0
## tertiary 13 7 4 65 12 3
## unknown 0 1 4 7 0 1
#Frequency Table for deposit and age
bank_Ftable2 <- table(bank$deposit, bank$age)
ftable(bank_Ftable2)
## 37 38 39 40
##
## no 0 209 200 210
## yes 150 144 143 62
#Frequency Table for Contact_Info and marital status
bank_Ftable3 <- table(bank$Contact_Info, bank$marital)
ftable(bank_Ftable3)
## DIVORCED MARRIED SINGLE
##
## cellular 90 487 197
## telephone 3 31 14
## unknown 49 177 70
#create CrossTable for 'Job' and 'Education'
banl_ct1 <- CrossTable(bank$education, bank$job,
dnn = c("Job", "Education"))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 1118
##
##
## | Education
## Job | admin. | blue-collar | entrepreneur | housemaid | management | retired | self-employed | services | student | technician | unemployed | unknown | Row Total |
## -------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|
## primary | 6 | 78 | 3 | 9 | 5 | 0 | 0 | 11 | 0 | 3 | 3 | 0 | 118 |
## | 5.389 | 151.997 | 0.463 | 11.524 | 20.091 | 0.422 | 3.272 | 0.397 | 1.055 | 16.882 | 0.303 | 0.422 | |
## | 0.051 | 0.661 | 0.025 | 0.076 | 0.042 | 0.000 | 0.000 | 0.093 | 0.000 | 0.025 | 0.025 | 0.000 | 0.106 |
## | 0.042 | 0.388 | 0.071 | 0.310 | 0.018 | 0.000 | 0.000 | 0.087 | 0.000 | 0.014 | 0.077 | 0.000 | |
## | 0.005 | 0.070 | 0.003 | 0.008 | 0.004 | 0.000 | 0.000 | 0.010 | 0.000 | 0.003 | 0.003 | 0.000 | |
## -------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|
## secondary | 116 | 106 | 17 | 15 | 28 | 3 | 18 | 107 | 2 | 138 | 24 | 0 | 574 |
## | 25.474 | 0.076 | 0.966 | 0.001 | 91.729 | 0.436 | 0.273 | 27.672 | 1.913 | 7.502 | 0.790 | 2.054 | |
## | 0.202 | 0.185 | 0.030 | 0.026 | 0.049 | 0.005 | 0.031 | 0.186 | 0.003 | 0.240 | 0.042 | 0.000 | 0.513 |
## | 0.817 | 0.527 | 0.405 | 0.517 | 0.101 | 0.750 | 0.581 | 0.849 | 0.200 | 0.648 | 0.615 | 0.000 | |
## | 0.104 | 0.095 | 0.015 | 0.013 | 0.025 | 0.003 | 0.016 | 0.096 | 0.002 | 0.123 | 0.021 | 0.000 | |
## -------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|
## tertiary | 16 | 5 | 21 | 5 | 235 | 1 | 13 | 7 | 4 | 65 | 12 | 3 | 387 |
## | 22.362 | 59.936 | 2.872 | 2.529 | 201.837 | 0.107 | 0.480 | 30.739 | 0.084 | 1.034 | 0.167 | 1.885 | |
## | 0.041 | 0.013 | 0.054 | 0.013 | 0.607 | 0.003 | 0.034 | 0.018 | 0.010 | 0.168 | 0.031 | 0.008 | 0.346 |
## | 0.113 | 0.025 | 0.500 | 0.172 | 0.848 | 0.250 | 0.419 | 0.056 | 0.400 | 0.305 | 0.308 | 0.750 | |
## | 0.014 | 0.004 | 0.019 | 0.004 | 0.210 | 0.001 | 0.012 | 0.006 | 0.004 | 0.058 | 0.011 | 0.003 | |
## -------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|
## unknown | 4 | 12 | 1 | 0 | 9 | 0 | 0 | 1 | 4 | 7 | 0 | 1 | 39 |
## | 0.184 | 3.549 | 0.148 | 1.012 | 0.045 | 0.140 | 1.081 | 2.623 | 38.216 | 0.025 | 1.360 | 5.306 | |
## | 0.103 | 0.308 | 0.026 | 0.000 | 0.231 | 0.000 | 0.000 | 0.026 | 0.103 | 0.179 | 0.000 | 0.026 | 0.035 |
## | 0.028 | 0.060 | 0.024 | 0.000 | 0.032 | 0.000 | 0.000 | 0.008 | 0.400 | 0.033 | 0.000 | 0.250 | |
## | 0.004 | 0.011 | 0.001 | 0.000 | 0.008 | 0.000 | 0.000 | 0.001 | 0.004 | 0.006 | 0.000 | 0.001 | |
## -------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|
## Column Total | 142 | 201 | 42 | 29 | 277 | 4 | 31 | 126 | 10 | 213 | 39 | 4 | 1118 |
## | 0.127 | 0.180 | 0.038 | 0.026 | 0.248 | 0.004 | 0.028 | 0.113 | 0.009 | 0.191 | 0.035 | 0.004 | |
## -------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|
##
##
#create CrossTable for 'age' and 'Housing'
banl_ct2 <- CrossTable(bank$age, bank$housing,
dnn = c("Age", "Housing"))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 1118
##
##
## | Housing
## Age | no | yes | Row Total |
## -------------|-----------|-----------|-----------|
## 37 | 84 | 66 | 150 |
## | 6.547 | 4.818 | |
## | 0.560 | 0.440 | 0.134 |
## | 0.177 | 0.102 | |
## | 0.075 | 0.059 | |
## -------------|-----------|-----------|-----------|
## 38 | 147 | 206 | 353 |
## | 0.047 | 0.035 | |
## | 0.416 | 0.584 | 0.316 |
## | 0.310 | 0.320 | |
## | 0.131 | 0.184 | |
## -------------|-----------|-----------|-----------|
## 39 | 139 | 204 | 343 |
## | 0.284 | 0.209 | |
## | 0.405 | 0.595 | 0.307 |
## | 0.293 | 0.317 | |
## | 0.124 | 0.182 | |
## -------------|-----------|-----------|-----------|
## 40 | 104 | 168 | 272 |
## | 1.111 | 0.818 | |
## | 0.382 | 0.618 | 0.243 |
## | 0.219 | 0.261 | |
## | 0.093 | 0.150 | |
## -------------|-----------|-----------|-----------|
## Column Total | 474 | 644 | 1118 |
## | 0.424 | 0.576 | |
## -------------|-----------|-----------|-----------|
##
##
#ggplot for education and marital status
ggplot(data = bank) + geom_bar(mapping = aes(x=marital, fill=education), position="dodge") + labs(title="The Impact Of Marital On Education", x ="Marital Stauts", y = "Count")
# ggplot for housing and different age
ggplot(data = bank) + geom_bar(mapping = aes(x=age, fill=housing), position="dodge") + labs(title="Housing At Different Ages", x ="Age", y = "count")
#ggplot for housing and different age before cleaning the data, bigger range of age
ggplot(data = bank_unclean) + geom_bar(mapping = aes(x=age, fill=housing), position="dodge") + labs(title="Housing At Different Ages-unclean data", x ="Age", y = "count")
#create ggplot for age and contact-info
ggplot(data = bank) + geom_bar(mapping = aes(x=age, fill= Contact_Info), position="dodge") + labs(title="Different contact info based on different Ages", x ="Age", y = "contact-info")
#Top 6 jobs based on different ages
bankrank <- bank %>%
select(education,age, job)
Mostjob <- aggregate(age ~ job, bankrank, mean)
slice( Mostjob[order(-Mostjob$age),],
1:10)
#create box plot for top 6 jobs based on the previous code
boxplot( bankrank$age ~ bankrank$job, main="Top 6 jobs based on different ages", xlab = "job", ylab = "age ", col = "purple")
#create box plot for housing and different ages
box1 <- boxplot( bank$age ~ bank$housing, main="box plot of Housing based on different ages", xlab = "housing", ylab = "age ", col = "light green")
#create box plot; if different ages has house or not
box2 <- boxplot( bank$age ~ bank$loan, main="box plot of getting loan based on different ages", xlab = "loan", ylab = "age ", col = "light pink")
#create ggplot-box plot based age and contact-info
ggplot(bank, aes(x = age, y = Contact_Info, color = "red")) + # ggplot function
geom_boxplot()
#plotly for job
hist1 <- plot_ly(data = bank ,x = ~job, color = ~"pink", type = 'histogram', mode = 'markers')%>%
layout(title = 'Job histogram plot', plot_bgcolor = "white")
hist1
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning: 'histogram' objects don't have these attributes: 'mode'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'autobinx', 'autobiny', 'bingroup', 'cliponaxis', 'constraintext', 'cumulative', 'customdata', 'customdatasrc', 'error_x', 'error_y', 'histfunc', 'histnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'nbinsx', 'nbinsy', 'offsetgroup', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textsrc', 'texttemplate', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'xaxis', 'xbins', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'yaxis', 'ybins', 'ycalendar', 'yhoverformat', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
#plotly for loan based on different ages and in different day
bank%>%
group_by(loan) %>%
do(p=plot_ly(., x = ~log(day),name =~age, type = "histogram")) %>%
subplot(nrows = 1, shareX = TRUE, shareY = FALSE)%>%
layout(title = 'Loan based on different ages and in different day', plot_bgcolor = "white")
#plotly for contact based on different Contact_Info in different ages
bank%>%
group_by(age) %>%
do(p=plot_ly(., x = ~log(age),name =~Contact_Info, type = "histogram")) %>%
subplot(nrows = 1, shareX = TRUE, shareY = TRUE)%>%
layout(title = 'Different contact info based on different ages', plot_bgcolor = "white")
#plotly with density axis and dual y-axis for relation between age and education
bankdens <- density(log(bank$age))
plot_ly( data = bank,
x = ~log(age),
type = "histogram",
name = "age") %>%
add_lines(x = bankdens$x, y = bankdens$y, yaxis = "y2", name = "education") %>%
layout(yaxis2 = list(overlaying = "y", #Adds the dual y-axis
side = "right", #Adds the density axis on the right side
rangemode = "tozero"))%>% #Forces both y-axes to start at 0
layout(title = 'relation between age and education', plot_bgcolor = "white")
#plotly with density axis and dual y-axis for relation between age and education
plot_ly( data = bank,
x = ~duration,
y = ~log(campaign),
type = "violin",
color = ~"pink",
side = "positive",
meanline = list(visible = T))%>%
layout(title = 'relation between duration and campaign', plot_bgcolor = "white")
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
#plotly with density axis and dual y-axis for relation between age and education
plot_ly( data = bank,
x = ~age,
y = ~log(campaign),
type = "violin",
color = ~"pink",
side = "negative",
meanline = list(visible = T))%>%
layout(title = 'Relation between age and campaign in violin plot', plot_bgcolor = "white")
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
Kabacoff, R. I. (2015). R in Action SECOND EDITION Data analysis and graphics with R (2nd ed.) by Manning Publications Co.
Ploty Graphics Library (2021). Bar Charts in R. Https://Plotly.Com/. https://plotly.com/r/barcharts/
Quick-R by Datacamp (2017). Subsetting Data. Www.Statmethods.Net. https://www.statmethods.net/management/subset.html
Stackoverflow (2016) .R “Error: unexpected ‘}’ in”} “[duplicate]. Https://Stackoverflow.Com/. https://stackoverflow.com/questions/40291675/r-error-unexpected-in
#install.packages("plyr")
#install.packages("dplyr")
#install.packages("tidyr")
#install.packages("tidyverse")
#install.packages("psych")
#install.packages("ggpubr")
#install.packages("ggplot2")
#install.packages("plotly")
#install.packages("moments")
#install.packages('gmodels')
library(plyr)
library(dplyr)
library(tidyr)
library(tidyverse)
library(psych)
library(ggpubr)
library(ggplot2)
library(plotly)
library(moments)
library(gmodels)
bank_unclean = read.csv("~/Downloads/Archive (1)/bank.csv")
bank_unclean
# Sorted dataframe with descending age
bank_sort <- bank_unclean[order(-bank_unclean$age),]
head(bank_sort)
# Drop column "default"
bank_drop <- select(bank_sort, -5)
head(bank_drop)
# Rename column
names(bank_drop)[names(bank_drop) == 'contact'] <- 'Contact_Info'
head(bank_drop)
# Remove the top 55 % and bottom 45%
bank <- bank_drop[6139:5022,] # Remove the top 55 % and bottom 45%
# Replace column string to Captial letters using GSUB
bank$marital <- gsub("single","SINGLE",as.character(bank$marital))
bank$marital <- gsub("married","MARRIED",as.character(bank$marital))
bank$marital <- gsub("divorced","DIVORCED",as.character(bank$marital))
head(bank)
str(bank)
#get the Mean, Min, Max, STD of each age balance
bank %>%
group_by(age)%>%
summarise_at(vars(balance),list(Mean=mean, Min=min, Max=max,STD=sd))
#get the Mean, Min, Max, STD of each campaign duration
bank %>%
group_by(campaign)%>%
summarise_at(vars(duration),list(Mean=mean, Min=min, Max=max,STD=sd))
subset(bank, balance == 1) #Find subset of balance
subset(bank, age == 20:40) #subset of ages between 20-40 years old
subset(bank, campaign == 10) #subset of campaign between only counted 10
#1st type of- Frequency Table
bank_table <- table(bank$education, bank$job)
bank_table
#Frequency Table for deposit and age
bank_Ftable2 <- table(bank$deposit, bank$age)
ftable(bank_Ftable2)
#Frequency Table for Contact_Info and marital status
bank_Ftable3 <- table(bank$Contact_Info, bank$marital)
ftable(bank_Ftable3)
#create CrossTable for 'Job' and 'Education'
banl_ct1 <- CrossTable(bank$education, bank$job,
dnn = c("Job", "Education"))
#create CrossTable for 'age' and 'Housing'
banl_ct2 <- CrossTable(bank$age, bank$housing,
dnn = c("Age", "Housing"))
#ggplot for education and marital status
ggplot(data = bank) + geom_bar(mapping = aes(x=marital, fill=education), position="dodge") + labs(title="The Impact Of Marital On Education", x ="Marital Stauts", y = "Count")
# ggplot for housing and different age
ggplot(data = bank) + geom_bar(mapping = aes(x=age, fill=housing), position="dodge") + labs(title="Housing At Different Ages", x ="Age", y = "count")
#ggplot for housing and different age before cleaning the data, bigger range of age
ggplot(data = bank_unclean) + geom_bar(mapping = aes(x=age, fill=housing), position="dodge") + labs(title="Housing At Different Ages-unclean data", x ="Age", y = "count")
#create ggplot for age and contact-info
ggplot(data = bank) + geom_bar(mapping = aes(x=age, fill= Contact_Info), position="dodge") + labs(title="Different contact info based on different Ages", x ="Age", y = "contact-info")
#Top 6 jobs based on different ages
bankrank <- bank %>%
select(education,age, job)
Mostjob <- aggregate(age ~ job, bankrank, mean)
slice( Mostjob[order(-Mostjob$age),],
1:10)
#create box plot for top 6 jobs based on the previous code
boxplot( bankrank$age ~ bankrank$job, main="Top 6 jobs based on different ages", xlab = "job", ylab = "age ", col = "purple")
#create box plot for housing and different ages
box1 <- boxplot( bank$age ~ bank$housing, main="box plot of Housing based on different ages", xlab = "housing", ylab = "age ", col = "light green")
#create box plot; if different ages has house or not
box2 <- boxplot( bank$age ~ bank$loan, main="box plot of getting loan based on different ages", xlab = "loan", ylab = "age ", col = "light pink")
#create ggplot-box plot based age and contact-info
ggplot(bank, aes(x = age, y = Contact_Info, color = "red")) + # ggplot function
geom_boxplot()
#plotly for job
hist1 <- plot_ly(data = bank ,x = ~job, color = ~"pink", type = 'histogram', mode = 'markers')%>%
layout(title = 'Job histogram plot', plot_bgcolor = "white")
hist1
#plotly for loan based on different ages and in different day
bank%>%
group_by(loan) %>%
do(p=plot_ly(., x = ~log(day),name =~age, type = "histogram")) %>%
subplot(nrows = 1, shareX = TRUE, shareY = FALSE)%>%
layout(title = 'Loan based on different ages and in different day', plot_bgcolor = "white")
#plotly for contact based on different Contact_Info in different ages
bank%>%
group_by(age) %>%
do(p=plot_ly(., x = ~log(age),name =~Contact_Info, type = "histogram")) %>%
subplot(nrows = 1, shareX = TRUE, shareY = TRUE)%>%
layout(title = 'Different contact info based on different ages', plot_bgcolor = "white")
#plotly with density axis and dual y-axis for relation between age and education
bankdens <- density(log(bank$age))
plot_ly( data = bank,
x = ~log(age),
type = "histogram",
name = "age") %>%
add_lines(x = bankdens$x, y = bankdens$y, yaxis = "y2", name = "education") %>%
layout(yaxis2 = list(overlaying = "y", #Adds the dual y-axis
side = "right", #Adds the density axis on the right side
rangemode = "tozero"))%>% #Forces both y-axes to start at 0
layout(title = 'relation between age and education', plot_bgcolor = "white")
#plotly with density axis and dual y-axis for relation between age and education
plot_ly( data = bank,
x = ~duration,
y = ~log(campaign),
type = "violin",
color = ~"pink",
side = "positive",
meanline = list(visible = T))%>%
layout(title = 'relation between duration and campaign', plot_bgcolor = "white")
#plotly with density axis and dual y-axis for relation between age and education
plot_ly( data = bank,
x = ~age,
y = ~log(campaign),
type = "violin",
color = ~"pink",
side = "negative",
meanline = list(visible = T))%>%
layout(title = 'Relation between age and campaign in violin plot', plot_bgcolor = "white")
##